
*-本文件夹下有两个参考程序，可以进一步处理里面的 label value 问题
* 需要添加的选项：
/*
- ok Fulllabel : 提取说明文件中的所有 label 信息；
- ok Save() 把处理好的文件存放到一个 dofile 中，用户自定义路径和文件名；
- ok 屏幕显示打开文件的蓝色链接；
- ok nodisplay : 不在屏幕上显示代码
- ok lower: 变量名转为小写
-    labelvalue : 用户设定该选项时,只(only)转换【数字-文字对应表】
- ? both: 同时转换变量标签和数字文字对应表 (容易出错)
- do (onestep) : 一次性搞定 -- 转换完label后直接执行，为变量添加标签；
*/

* Puzzle
* RelationtoComCode [出质方]     - 1=控股股东；5=董监高；6=其他；若有，逗号隔开
* label define  RelationtoComCode   1 "控股股东" 5 "董监高" 6 "其他" 若有多重关系，用半角逗号隔开" 
*-update: 2018/11/14 10:35
* 使用 infix 导入原始 txt 文档
* 

*-2018/11/28 15:26
* TRD_Co[DES][xlsx].txt 中的 Label value 是用 逗号 分隔的

*! Author: Yujun Lian (连玉君)
*! version 1.1
*! 2019/1/6 9:39

cap program drop GTALabel
program define GTALabel
version 15

syntax anything(id="txt file" name=filesource)  ///
     [, DO Fulllabel Lower Save(string asis) REPLACE Compress ///
        LABelvalue BOTH NODisplay]
 
preserve

qui{     // qui begin 

    clear

// delete ""
   local filesource = ustrregexra(`"`filesource'"',`"""',"") // stata14 + 
   * local filesource = subinstr(`"`filesource'"',`"""',"",.)  // stata14 + -
	
// split filepath and filename	

*local filesource `"D:\stata15\ado\personal\Net_course_C\C_GTA\data/test[DES][txt].txt"'  //temp
*local filesource "test.txt"
*local filesource `"D:\stata15\ado\中国 广东\data/test[DES][txt].txt"'

    if strpos(`"`filesource'"', "\") | strpos(`"`filesource'"', "/"){
        local p1 = ustrrpos("`a'","\")   // stata14 +
        local p2 = ustrrpos("`a'","/")   // stata14 +
	   * local p1 = strrpos("`a'","\")
	   * local p2 = strrpos("`a'","/")
		local i = max(`p1', `p2')
	}
	else{
	    local i = 0
	}

	local filepath      = substr(`"`filesource'"',1,`i')
	local filename_full = substr(`"`filesource'"',`=`i'+1',.)
/*
	dis `"`filepath'"'       //temp
	dis `"`filename_full'"'  //temp
 */
	
	*local filename_full "test[DES][txt].txt"       //temp
	tokenize `"`filename_full'"', parse(.)
	local filename_full "`1'"
	
    if strpos(`"`filename_full'"', "[")>0{  //filename[DES][txt]
	    tokenize `"`filename_full'"', parse([)
	    local filename_sim "`1'"  
		local filesource_temp `"`filesource'"'
		      dis "`filename_sim'"   //temp
    }
    else{                                   //filename
        local filename_sim `"`filename_full'"'
 		local filename_full `"`filename_sim'[DES][txt]"'
 		dis `"`filename_full'"'      //temp
		local filesource_temp `"`filepath'`filename_full'.txt"'
		      dis "`filesource_temp'"  //temp
    }

						 
// check if file exist
	if !fileexists(`"`filesource_temp'"') {
		di as error `"File `filesource' could not be found"'
		exit 601
	}	
	
// check specification of -labelvalue- and -both- option
    if "`labelvalue'"!="" & "`both'"!=""{
	   dis as error "You can only specify one of -labelvalue- and -both-"
	   exit
	}
							 				 
// -compress- option

	if "`compress'"!=""{
	    local blank1 ""
		local blank2 " "
	}
	else{
	    local blank1 "  "
		local blank2 "  "
	}
   

// drop BOM characters (byte order marks)
// see 
*  https://docs.microsoft.com/en-us/windows/desktop/intl/using-byte-order-marks
*  https://en.wikipedia.org/wiki/Byte_order_mark
   tempfile filesource_noBOM
   filefilter `"`filesource_temp'"' `"`filesource_noBOM'.txt"', from("\EFh\BBh\BFh") to("") replace 
   
/*
   tempfile filesource_noBOM   f_noBOM1   f_noBOM2
   filefilter `"`filesource_temp'"'   `"`f_noBOM1'.txt"', from("\BBh") to("") replace 
   filefilter `"`f_noBOM1'.txt"'          `"`f_noBOM2'.txt"', from("\BFh") to("") replace 
   filefilter `"`f_noBOM2'.txt"'  `"`filesource_noBOM'.txt"', from("\EFh") to("") replace 
*/
  * filefilter `"`f_noBOM2'.txt"'  abc123.txt, from("\EFh") to("") replace 
/*
. filefilter abc.txt abc_22.txt, from("\239d\187d\191d") to("") replace //  十进制
. filefilter abc.txt abc_22.txt, from("\EFh\BBh\BFh")    to("") replace //十六进制

filefilter abc.txt abc_22.txt,    from("\BBh") to("") replace 
filefilter abc_22.txt abc_23.txt, from("\BFh") to("") replace 
filefilter abc_23.txt abc_24.txt, from("\EFh") to("") replace 
*/   
   
// input data
  * infix strL v 1-1000 using `"`filesource_temp'"', clear  
   import delimited using `"`filesource_noBOM'.txt"', encoding(utf8) clear 
   rename v1 v
   replace v = strltrim(v)  // without leading blanks 
   tempname data0  
   save "`data0'", replace   
 
 
// variable labels
if ("`labelvalue'" ==""){   // & ("`both'"=="")
   split v, p([ ]) gen(s)
   
   if "`lower'" != ""{
      replace s1 = lower(s1)
   }  
   
   if "`fulllabel'" != ""{  // fulllabel
      replace s2 = s2+s3
   }
   
   replace s2 = "`blank1'label var`blank2'" + s1 + `" ""' + s2 + `"""'  
 
   keep s2
   
   if "`compress'"==""{
       insobs 3, before(1)
       replace s2 = "*----------------" in 1
       replace s2 = "*-Label Variables" in 2
       replace s2 = "                 " in 3
   }
   
   tempname dataLabel
   save "`dataLabel'.dta", replace   // Data 1: variable labels 
} 

 
// value label
   
if ("`labelvalue'"!="")|("`both'"!=""){
    	
       use "`data0'", clear 
	   
	   keep if regexm(v,"[0-9](=|＝)") // with label value
	   keep if regexm(v," - [0-9]") // with label value
	   
// check if value-label defined in TXT file
       if _N==0{
           dis in red "Warning: No value-label defined in your txt file, e.g."
           dis in red "Varname [xxxx] - 1=第一类；2=第二类；3=第三类"
		   dis in red "You can delete option -labelvalue- in your command"
		   exit      //退出程序
       }
   
       replace v = subinstr(v, "-", "", 1)
       split v, p([ ]) gen(s)
    	
	   if "`lower'" != ""{
          replace s1 = lower(s1)
       }  

	   replace s3 = ustrregexra(s3,"(=|＝)",`" ""',.)
	   replace s3 = ustrregexra(s3,"(；|;)",`"" "',.)
       replace s3 = subinstr(s3, "。", "",.)

       replace s3 = "`blank1'label define`blank2'" + s1 + s3 + `"""'
	   
	   gen id0= _n
	   expand 2 
	   sort id0
	   bysort id0: gen id12 = _n
	   replace s3 = "`blank1'label value `blank2'" + s1 + " " + s1 if id12==2
	   
	   keep s3	
	   rename s3 s2
	   
       if "`compress'"==""{	   
           insobs 4, before(1)	 
	       replace s2 = "              " in 1	
           replace s2 = "*-------------" in 2
           replace s2 = "*-Value labels" in 3
           replace s2 = "              " in 4	
       }  
	   else{
           insobs 1, before(1)	 
	       replace s2 = "              " in 1			       
	   }
	
	   tempname dataLabValue
	   save "`dataLabValue'.dta", replace     // Data 2: value labels
}
   
   
// Export data

//Append data1 and data2   
  
if ("`labelvalue'"=="")&("`both'"==""){
   use "`dataLabel'.dta", clear
}
if ("`labelvalue'"!=""){
   use "`dataLabValue'.dta", clear
}
if "`both'"!=""{
   use "`dataLabel'.dta", clear
   append using "`dataLabValue'.dta"
}
    
   format s2 %-40s
   
   
// export do file   
   tempname disDOonly
   outfile using `"`disDOonly'.do"', noquote wide replace 
   
   
// display in screen   
   if "`nodisplay'"==""{
       noi dis _n
       noi type `"`disDOonly'.do"', asis
   }	
   
* set trace on   
// do          仍然存在问题，应该是文件里仍然有隐藏字符 2019/1/6 15:14
/*
   if "`do'" !=""{
       do `"`disDOonly'.do"'
   }
*/

	
	*-save(): 
	*- 不设定 save() 选项，默认输出同名 dofile 到当前工作路径下
	*- 如果用户设定了后缀，则使用用户的后缀，但只接受 .txt, .do, .md 这三种后缀
	*- 如果用户没有使用后缀，则默认输出 .do 文档
* set trace off

if `"`save'"'~="" {  // copy from -logout.ado-
    * delete ""
	*  local save = ustrregexra(`"`save'"',`"""',"") // stata14 + 
	 local save = subinstr(`"`save'"',`"""',"",.)  // stata14 + -
	* assign save name
	local beg_dot = index(`"`save'"',".")
	if `beg_dot'~=0 {
		local suffixname = substr(`"`save'"',`=`beg_dot'+1',.)
		if ~inlist("`suffixname'","txt","do","md"){
		   noi dis in red "Only [ .txt, .do, .md ] files are supported by {opt save()}"
		   exit 
		}
		local strippedname = substr(`"`save'"',1,`=`beg_dot'-1')
		*   noi dis "strippedname : " "`strippedname'"
		*local save `"`strippedname'.txt"'
	}
	else {
		* `save' has no extension, export .do to current directory
		local strippedname `"`save'"'
		local suffixname = ".do"
		*local save `"`save'.do"'
		*    noi dis "不加后缀: strippedname : `strippedname'  || suffname=`suffixname'"
	}
	
	cap confirm file `"`save'"'
	if !_rc & "`replace'"~="replace" {
		* it exists
		noi di in red `"`save' already exists; specify {opt replace}"'
		exit 198
	}
	
	// export file
	local save `"`strippedname'.`suffixname'"'
	outfile using `"`save'"', noquote wide replace 
	
	
/*
*--------------------------去除 BOM 头文件----------begin----------
   *-去除 BOM 头文件	
   tempfile file_noBOM
   filefilter `"`save'"' `"`file_noBOM'.`suffixname'"', from("\EFh\BBh\BFh") to("") replace 
   local save `"`file_noBOM'.`suffixname'"'
   shell save `"`file_noBOM'.`suffixname'"' `"`save'"'
*--------------------------去除 BOM 头文件----------over-----------   
*/
   
    // view file	
	if inlist("`suffixname'","txt","md"){
	   noi dis `"{stata `" view "`save'" "' : open}"'
	}
	else{
	   noi dis `"{stata `" doedit "`save'" "' : open}"'
	}

	
    // display dir 
    if strpos(`"`save'"', "\"){
	   local i = strrpos(`"`save'"', "\")
	}
    else if strpos(`"`save'"', "/"){
	   local i = strrpos(`"`save'"', "/")	
	}
	else{
	   local i = 0
	}

	local filepath = substr(`"`save'"',1,`i')	
	
    if `i'!=0{
	    noi di `"{browse `"`filepath'"': dir}"'	
	}
	else{
	    noi di `"{browse `"`c(pwd)'"': dir}"'	
	}
}



// erase tempfiles 
    cap erase "`data0'.dta"
    cap erase "`dataLabel'.dta"
	cap erase "`dataLabValue'.dta"
    cap erase "`disDOonly'.do"

}  // qui over 
	 
restore
   
end 
 

exit 
 
 
 
 
*--------------------------------------out of use-------------- 
 
 
   local lower "lower"                   // option lower
   local lower ""
   local fulllabel ""
   local fulllabel "full"
   

   
 global file "test[DES][txt].txt"  
 local file "$file"  
 
   infix strL v 1-1000 using `"`file'"', clear

  
 save data0, replace 
   
   split v, p([ ]) gen(s)
   
   if "`lower'" != ""{
      replace s1 = lower(s1)
   }  
   
   if "`fulllabel'" != ""{  // fulllabel
      replace s2 = s2+s3
   }
   
   replace s2 = "  label var  " + s1 + `" ""' + s2 + `"""'  

   
*-value label
   
   if "`nolabelvalue'" == ""{
    	
       use "`data0'", clear 
       
       use data0, clear
	   
	   keep if regexm(v,"[0-9](=|＝)") // with label value
		
       replace v = subinstr(v, "-", "", 1)
       split v, p([ ]) gen(s)
    	
	   if "`lower'" != ""{
          replace s1 = lower(s1)
       }  

	   replace s3 = ustrregexra(s3,"(=|＝)",`" ""',.)
	   replace s3 = ustrregexra(s3,"(；|;)",`" ""',.)
       replace s3 = subinstr(s3, "。", "",.)

       replace s3 = "  label define " + s1 + s3 + `"""'
	   
	   gen id0= _n
	   expand 2 
	   sort id0
	   bysort id0: gen id12 = _n
	   replace s3 = "  label value " + s1 + " " + s1 if id12==2
    	 
 	 
	   list s3, clean noobs compress
	   
	   tempname dataLabValue
	   save "`dataLabValue'.dta", replace  //to be appended
   }


   
   
 }	
	 
 *-export txt file
   
   format s3 %-40s
   
   if strpos("`file'", "[")>0{
     *local ff = subinstr("`file'", "[", "_", .)
     *local ff = subinstr("`ff'", "]", "_", .)
	 tokenize "`file'", parse([)
	 local ff "`1'"
   }
   else{
     local ff "`file'"
   }
   
   local vorder "a1a s1a a2 s2"
   order `x'

   qui outfile `vorder' using "`ff'_label_temp.raw", noquote replace wide
   
   filefilter `ff'_label_temp.raw `ff'_label.raw, ///
              from("           ") to(" ") replace
			  
   erase `ff'_label_temp.raw
   
   cap erase `ff'_label.do
   
   shell rename `ff'_label.raw `ff'_label.do
    
   doedit `ff'_label.do
   
   *-list the results on the screen
     local x "a1 v1 s1 a2 s2"
     order `x'
     list  `x', clean noobs noheader  //list the results
	 dis _n
	 dis _n "  *-------------------------------"
	 dis    "  *-定义-`file'-文件的变量标签"
	 dis    "    do `ff'_label.do //定义变量标签的命令"	 
	 

 *}
    
	local x "s3"
    qui outfile `x' using "`ff'_labelvalue_temp.raw", noquote replace wide
   
   filefilter `ff'_labelvalue_temp.raw `ff'_labelvalue.raw, ///
              from("^^") to("\n") replace
			  
   erase `ff'_labelvalue_temp.raw
   
   cap erase `ff'_label.do
   
   shell rename `ff'_labelvalue.raw `ff'_labelvalue.do
    
   doedit `ff'_labelvalue.do 
   
	list s3, clean noobs noheader		 
	 
	
	 
   *restore
   
 end
 
 
 
 
 
 
 
 
*-临时文件 
 
 
 
 
 
   if "`fulllabel'" != ""{  // fulllabel
     split v, p("[") gen(s)
     replace s2 = `"""'+"["+s2+`"""'
     gen s0="  label var  "
     drop s
     order s0 s1 s2
     gen s = s0+s1+"	"+s2    
  }
  else{
     split v, p([ ]) gen(s)
     gen v1 = "  label var"
     if "`lower'" != ""{
       replace s1 = lower(s1)
     }
     gen s1a = v1 + "  " + s1   
     replace s2 = `"""' + s2 + `"""'      
  }
   

